cd "/Users/Doug.Ahler/Library/CloudStorage/Box-Box/Research_Insights_and_Innovations/Secure_Retirement_Trust_RII/Financial_Health_and_Knowledge_RII/2023_Financial_Health_Survey_Rerun/Research_and_Evaluation/Data/Do files and cleaned data"

use "FH2023_Final_Deidentified.dta", clear

* Read in weights
svyset _n [pw = weight], strata(language) singleunit(certainty)

* Only keep the 386 who answered the hardship questions
drop if foodhardship_1_recode == .

* Create binary indicators
	* Food
	recode foodhardship_1_recode (2 = 0)
	recode foodhardship_2_recode (2 = 0)
	recode foodhardship_3_recode (2 = 0)

	* Housing
	recode hardship_2 (2 = 0)
	recode hardship_3 (2 = 0)
	recode hardship_4 (2 = 0)
	recode hardships_part2_1 (2 = 0)
	recode hardships_part2_2 (2 = 0)
	
	* Gender indicator for regressions
	gen male = 0
	replace male = 1 if gender == 2
	
	* AP indicator
	gen ap = 0
	replace ap = 1 if home_employer_type == 1

* Create indices
egen foodhardship_index = rowtotal(foodhardship_1_recode foodhardship_2_recode foodhardship_3_recode)
egen housinghardship_index = rowtotal(hardship_2 hardship_3 hardship_4 hardships_part2_1 hardships_part2_2)

* Calculate rate of having at least one hardship
gen oneplus = 0
replace oneplus = 1 if foodhardship_index > 0 | housinghardship_index > 0
svy: tab oneplus, col

* Keep data needed and save for plotting in R (I know, I know this is lazy/cheating, but I'm in a crunch here)
* keep foodhardship_index housinghardship_index cg_income_change weight
* save "20240905_forRplots.dta", replace

*** Run analyses, in order of how they appear in the paper

* Demographics of sample 
svy: tab gender
svy: tab bipoc

* Main table of hardships: In Liz K.'s .do file

* Regression of food hardship index on income volatility with control variables (first appears in description of material hardships by demographics in "Material Hardship Prevalence" section)
svy: reg foodhardship_index i.cg_income_change
svy: reg foodhardship_index i.income
svy: reg foodhardship_index bipoc
svy: reg foodhardship_index male
svy: reg foodhardship_index i.cg_income_change i.income bipoc male
	* Adding in NELP and AP for R&R
	svy: reg foodhardship_index lep
	svy: reg foodhardship_index ap
	svy: reg foodhardship_index i.cg_income_change i.income bipoc male lep ap

* Regression of housing hardship index on income volatility with control variables (first appears in description of material hardships by demographics in "Material Hardship Prevalence" section)
svy: reg housinghardship_index i.cg_income_change
svy: reg housinghardship_index i.income
svy: reg housinghardship_index bipoc
svy: reg housinghardship_index male
svy: reg housinghardship_index i.cg_income_change i.income bipoc male
	* Adding in NELP and AP for R&R
	svy: reg housinghardship_index lep
	svy: reg housinghardship_index ap
	svy: reg housinghardship_index i.cg_income_change i.income bipoc male lep ap

* Means of hardship indices
svy: reg foodhardship_index
svy: reg housinghardship_index

* Distribution of caregiving income volatility
svy: tab cg_income_change

* SEE R CODE FOR PLOTS

* Demographics: Note---there are missing obs for education, gender, lgbtq, disability, race, and ethnicity. Can't use the "mis" option with svy. Have to calculate weighted percentages by hand.
svy: tab agecat, count obs
svy: tab education, count obs
	display 33.21/386
	display 94.55/386
	display 100.9/386
	display 70.22/386
	display 51.93/386
	display 21.63/386
	display 13.41/386
	display (386 - 33.21 - 94.55 - 100.9 - 70.22 - 51.93 - 21.63 - 13.41) / 386
svy: tab gender, count obs
	display 310/386
	display 54.46/386
	display 9.692/386
	display 1.67/386
	display 9.978/386
	display (386 - 310 - 54.46 - 9.692 - 1.76 - 9.978) / 386
svy: tab lgbtq, count obs
	display 41.54/386
	display 327.6/386
	display 16.7/386
	display (386 - 41.54 - 327.6 - 16.7) / 386
svy: tab disability, count obs
	display 65.65 / 386
	display 297.2 / 386
	display 21.63 / 386
	display 1.6 / 386
* Race (second line gets weighted %)
	* White
	svy: tab race_1, count obs
	display 275 / 386
	* Black
	svy: tab race_2, count obs
	display 24.92 / 386
	* Asian
	svy: tab race_3, count obs
	display 47.71 / 386
	* NH/PI
	svy: tab race_4, count obs
	display 4.44 / 386
	* AI/AN
	svy: tab race_5, count obs
	display 12.46 / 386
	* Other
	svy: tab race_6, count obs
	display 15.36 / 386
	* PNA
	svy: tab race_7, count obs
	display 24.53 / 386
svy: tab latino, count obs
	display 61.6 / 386
	display 307.8 / 386
	display 15.03 / 386
	display 1.6 / 386
svy: tab income, count obs
	display 37.23/386
	display 116.4 / 386
	display 114.3 / 386
	display 56.37/ 386
	display 24.68 / 386
	display 14.42 / 386
	display 22.73 / 386
	
*****************************
*** SIPP ANALYSIS FOR R&R *** 
*****************************

cd "~/Library/CloudStorage/Box-Box/Research_Insights_and_Innovations/Secure_Retirement_Trust_RII/Financial_Health_and_Knowledge_RII/2019_Financial_Health_SIPP_Analysis/2023 SIPP_FH Comparison"

* Load data
use "SIPP2023_WAsubset.dta", clear

* Read in weights
svyset _n [pw = WPFINWGT], singleunit(certainty)

* Generate income cat variables
gen income_cat = .
replace income_cat = 1 if householdyrincome < 20000
replace income_cat = 2 if inrange(householdyrincome, 20000, 39999)
replace income_cat = 3 if inrange(householdyrincome, 40000, 59999)
replace income_cat = 4 if inrange(householdyrincome, 60000, 79999)
replace income_cat = 5 if inrange(householdyrincome, 80000, 99999)
replace income_cat = 6 if inrange(householdyrincome, 100000, 199999)
replace income_cat = 7 if householdyrincome > 119999

label define inc_cat_lbl 1 "Less than $20,000" 2 "$20,000 - $39,999" 3 "$40,000 - $59,999" 4 "$60,000 - $79,999" 5 "$80,000 - $99,999" 6 "$100,000 - $119,999" 7 "$120,000 or more"
label values income_cat inc_cat_lbl

svy: tab income_cat, count obs

* Demographic table additions
gen agecat = .
replace agecat = 1 if TAGE < 40
replace agecat = 2 if inrange(TAGE, 40, 49)
replace agecat = 3 if inrange(TAGE, 50, 59)
replace agecat = 4 if TAGE >= 60
svy: tab agecat, obs

gen educ_cat = .
replace educ_cat = 1 if inrange(EEDUC, 31, 38)
replace educ_cat = 2 if EEDUC == 39
replace educ_cat = 3 if inrange(EEDUC, 40, 41)
replace educ_cat = 4 if EEDUC == 42
replace educ_cat = 5 if EEDUC == 43
replace educ_cat = 6 if EEDUC > 43
svy: tab educ_cat, obs

svy: tab ERACE, obs

svy: tab EORIGIN, obs
